# -*- coding: utf-8 -*-

import datetime
import socket

import scrapy

from ..items import Ch01Item, MyItemLoader


class Quotes01Spider(scrapy.Spider):
    """
    简单页面爬虫演示
    """
    name = 'quotes01'
    allowed_domains = ['localhost']
    start_urls = [
        'http://localhost:8000/page/page1.html'
    ]

    def parse(self, response):
        """
        爬取Quotes to Scrape首页，运行：scrapy crawl quotes01 -o items.json
        @url http://quotes.toscrape.com/
        @return items 10
        @scrapes text author tags
        :param response: 响应
        :return: 解析后的Item
        """
        for quote in response.xpath('//div[@class="col-md-8"]/div[@class="quote"]'):
            # Create the loader using the selector
            l = MyItemLoader(item=Ch01Item(), selector=quote)

            # xpath
            l.add_xpath('text', './span[@class="text"]/text()')
            l.add_xpath('author', './span/small[@class="author"]/text()')
            l.add_xpath('tags', './div[@class="tags"]/a/text()')

            # values
            l.add_value('url', response.url)
            l.add_value('project', self.settings.get('BOT_NAME'))
            l.add_value('spider', self.name)
            l.add_value('server', socket.gethostname())
            l.add_value('date', datetime.datetime.now())

            yield l.load_item()
